self.fail('received events did not match expected events\nExpected:\n' + pprint.pformat(expected_events) + '\nReceived:\n' + pprint.pformat(events))
def check_parse_error(self, source):
parser = EventCollector()
try:
parser.feed(source)
parser.close()
except sgmllib.SGMLParseError:
pass
self.fail('expected SGMLParseError for %r\nReceived:\n%s' % (source, pprint.pformat(parser.get_events())))
def test_doctype_decl_internal(self):
inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'\n SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [\n <!ELEMENT html - O EMPTY>\n <!ATTLIST html\n version CDATA #IMPLIED\n profile CDATA 'DublinCore'>\n <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>\n <!ENTITY myEntity 'internal parsed entity'>\n <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>\n <!ENTITY % paramEntity 'name|name|name'>\n %paramEntity;\n <!-- comment -->\n]"
self.check_events([
'<!%s>' % inside], [
('decl', inside)])
def test_doctype_decl_external(self):
inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'"
self.check_events('<!%s>' % inside, [
('decl', inside)])
def test_underscore_in_attrname(self):
'''Make sure attribute names with underscores are accepted'''
self.check_events('<a has_under _under>', [
('starttag', 'a', [
('has_under', 'has_under'),
('_under', '_under')])])
def test_underscore_in_tagname(self):
'''Make sure tag names with underscores are accepted'''
self.check_events('<has_under></has_under>', [
('starttag', 'has_under', []),
('endtag', 'has_under')])
def test_quotes_in_unquoted_attrs(self):
'''Be sure quotes in unquoted attributes are made part of the value'''
self.check_events('<a href=foo\'bar"baz>', [
('starttag', 'a', [
('href', 'foo\'bar"baz')])])
def test_xhtml_empty_tag(self):
'''Handling of XHTML-style empty start tags'''
self.check_events('<br />text<i></i>', [
('starttag', 'br', []),
('data', 'text'),
('starttag', 'i', []),
('endtag', 'i')])
def test_processing_instruction_only(self):
self.check_events('<?processing instruction>', [
('pi', 'processing instruction')])
def test_bad_nesting(self):
self.check_events('<a><b></a></b>', [
('starttag', 'a', []),
('starttag', 'b', []),
('endtag', 'a'),
('endtag', 'b')])
def test_bare_ampersands(self):
self.check_events('this text & contains & ampersands &', [
('data', 'this text & contains & ampersands &')])
def test_bare_pointy_brackets(self):
self.check_events('this < text > contains < bare>pointy< brackets', [
('data', 'this < text > contains < bare>pointy< brackets')])